# ===============================================================#
#                     Replication files for:                     #
#.  "Attitudinal and Behavioral Legacies of Wartime Violence:    #
#                      A Meta-Analysis"                          #
#                        Joan Barceló                            #
#               American Political Science Review                #
#               Last update: September 3, 2025                   #
# ===============================================================#

#################################
# Figure G.1: P-curves for all outcomes
################################

## ------------------------ Load Packages ------------------------

library(dplyr)
library(ggplot2)
library(tidyr)
library(readr)
library(metafor)
library(RColorBrewer)
library(purrr)
library(patchwork)
library(grid)
library(tibble)


# ---------- Paths and file maps ----------
BASE <- "~/Datasets/"

# Outcome -> filename maps (Full)
files_full <- c(
  "Social groups participation" = "meta.groups.csv",
  "Political participation"     = "meta.political_participation.csv",
  "Voting"                      = "meta.voting.csv",
  "Generalized trust"           = "meta.generalized_trust.csv",
  "Interest / knowledge"        = "meta.political_interest.csv",
  "Normative prosociality"      = "meta.normative.csv",
  "Community leadership"        = "meta.leadership.csv",
  "Altruism"                    = "meta.altruism.csv",
  
  "Attitudes toward wartime enemies" = "meta.warenemies.csv",
  "Threat perceptions"               = "meta.threat.csv",
  "Intergroup attitudes"             = "meta.intergroup.csv",
  
  "Ingroup trust"        = "meta.ingroup_trust.csv",
  "Group voting"         = "meta.groupvoting.csv",
  "Group identification" = "meta.groupid.csv",
  
  "Political intolerance"        = "meta.political_intolerance.csv",
  "Authoritarian attitudes"      = "meta.authoritarian.csv",
  "Hawkish security preferences" = "meta.hawkish.csv",
  "Support for punitive justice" = "meta.punitive.csv",
  "Extreme ideology"             = "meta.xtrideology.csv",
  "Social intolerance"           = "meta.social_intolerance.csv",
  "Antagonism toward peace"      = "meta.antipeace.csv",
  "Institutional distrust"       = "meta.instmistrust.csv"
)

# Bivariate and quasi experimental file maps
files_biv <- sub("\\.csv$", "_biv.csv", files_full)
files_qe  <- sub("\\.csv$", "_random.csv", files_full)

# Exclude QE outcomes that cannot run
files_qe  <- files_qe[!names(files_qe) %in% c("Normative prosociality", "Political intolerance")]

## ------------------------ Helpers ------------------------

process_outcome <- function(df, outcome_name) {
  if (is.null(df) || nrow(df) == 0) return(NA)
  if (!all(c("coef", "se") %in% names(df))) return(NA)
  
  df <- df %>%
    mutate(
      coef = suppressWarnings(as.numeric(coef)),
      se   = suppressWarnings(as.numeric(se))
    ) %>%
    filter(is.finite(coef), is.finite(se), se > 0)
  if (nrow(df) == 0) return(NA)
  
  meta_data <- df %>%
    transmute(
      z_score = coef / se,
      p_value = 2 * (1 - pnorm(abs(z_score))),
      outcome = outcome_name
    ) %>%
    filter(p_value <= 0.05) %>%
    mutate(
      p_value_group = cut(
        p_value,
        breaks = seq(0, 0.05, by = 0.01),
        include.lowest = TRUE,
        labels = c("0.01", "0.02", "0.03", "0.04", "0.05")
      ),
      distribution = "Observed"
    )
  
  out <- meta_data %>%
    count(distribution, p_value_group, name = "count") %>%
    group_by(distribution) %>%
    mutate(proportion = count / sum(count)) %>%
    ungroup() %>%
    mutate(outcome = outcome_name) %>%
    complete(
      distribution = "Observed",
      p_value_group = factor(c("0.01","0.02","0.03","0.04","0.05"),
                             levels = c("0.01","0.02","0.03","0.04","0.05")),
      fill = list(count = 0, proportion = 0, outcome = outcome_name)
    )
  out
}

load_panel_observed <- function(file_map) {
  pcs <- lapply(names(file_map), function(outcome) {
    f <- file.path(BASE, file_map[[outcome]])
    if (!file.exists(f)) {
      warning(sprintf("File not found for %s: %s", outcome, f))
      return(NA)
    }
    df <- readr::read_csv(f, show_col_types = FALSE, progress = FALSE)
    process_outcome(as.data.frame(df), outcome)
  })
  pcs %>% purrr::compact() %>% bind_rows() %>% filter(distribution == "Observed")
}

## ------------------------ Simulated reference curves ------------------------
set.seed(123)
uniform_null_p_values <- runif(1000, 0.01, 0.05)
alt_null_p_values     <- c(runif(670, 0.01, 0.05), runif(330, 0.001, 0.01))
uniform_null_data     <- tibble::tibble(p_value = uniform_null_p_values, distribution = "Uniform Null")
alt_null_data         <- tibble::tibble(p_value = alt_null_p_values,     distribution = "Alt. Null")

alt_null_p_value_proportions <- alt_null_data %>%
  filter(p_value <= 0.05) %>%
  mutate(
    p_value_group = cut(
      p_value,
      breaks = seq(0, 0.05, by = 0.01),
      include.lowest = TRUE,
      labels = c("0.01", "0.02", "0.03", "0.04", "0.05")
    )
  ) %>%
  count(p_value_group, name = "count") %>%
  mutate(proportion = count / sum(count)) %>%
  complete(
    p_value_group = factor(c("0.01","0.02","0.03","0.04","0.05"),
                           levels = c("0.01","0.02","0.03","0.04","0.05")),
    fill = list(count = 0, proportion = 0)
  )

## ------------------------ Load data for each panel ------------------------
all_observed_full   <- load_panel_observed(files_full)
all_observed_biv    <- load_panel_observed(files_biv)
all_observed_random <- load_panel_observed(files_qe)

## ------------------------ Colors and plotting ------------------------
# Fixed set of outcomes used for the legend across panels
outcome_levels <- sort(unique(c(names(files_full), names(files_biv), names(files_qe))))
pal_base <- brewer.pal(8, "Set2")
pal      <- colorRampPalette(pal_base)(length(outcome_levels))
outcome_colors <- setNames(pal, outcome_levels)
legend_rows <- 8

make_pcurve_plot <- function(observed_df,
                             outcome_colors,
                             outcome_levels,
                             legend = c("none", "bottom"),
                             legend_rows = 2) {
  legend <- match.arg(legend)
  stopifnot(nrow(observed_df) > 0)
  alt_max_y <- min(max(alt_null_p_value_proportions$proportion, na.rm = TRUE) + 0.05, 1)
  
  ggplot() +
    geom_line(
      data = observed_df,
      aes(x = as.numeric(as.character(p_value_group)),
          y = proportion,
          color = outcome,
          group = outcome),
      linewidth = 1
    ) +
    geom_line(
      data = alt_null_p_value_proportions,
      aes(x = as.numeric(as.character(p_value_group)), y = proportion),
      linewidth = 1.3, color = "black"
    ) +
    geom_hline(yintercept = 0.2, color = "grey50", linewidth = 0.8) +
    ggplot2::annotate("text", x = 0.05,  y = 0.2,
             label = "Uniform Null Distribution",
             color = "grey30", hjust = 1, vjust = -0.6, fontface = "italic") +
    ggplot2::annotate("text", x = 0.011, y = alt_max_y,
             label = "Alt. Null\nDistribution",
             color = "black", hjust = 0, vjust = 1, fontface = "italic") +
    scale_x_continuous(breaks = seq(0.01, 0.05, by = 0.01), limits = c(0.01, 0.05)) +
    scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
    scale_color_manual(
      values = outcome_colors,
      limits = outcome_levels,
      breaks = outcome_levels,
      drop   = FALSE
    ) +
    guides(color = guide_legend(
      nrow = legend_rows, byrow = TRUE,
      override.aes = list(linewidth = 1.2),
      keywidth = unit(14, "pt"), keyheight = unit(10, "pt")
    )) +
    labs(x = "p-value", y = "Proportion of studies", color = NULL) +
    theme_minimal(base_size = 12) +
    theme(
      legend.position = legend,
      legend.box = "horizontal",
      legend.margin = margin(4, 6, 4, 6),
      legend.text = element_text(size = 9),
      axis.title.x = element_text(face = "bold"),
      axis.title.y = element_text(face = "bold")
    )
}

## ------------------------ Build plots ------------------------
# Panel A, full
p_curve_all_combined_withlegend <- make_pcurve_plot(all_observed_full, outcome_colors, outcome_levels, legend = "bottom", legend_rows = legend_rows)
p_curve_all_combined            <- make_pcurve_plot(all_observed_full, outcome_colors, outcome_levels, legend = "none",   legend_rows = legend_rows)

# Panel B, bivariate
p_curve_all_combined_biv_withlegend <- make_pcurve_plot(all_observed_biv, outcome_colors, outcome_levels, legend = "bottom", legend_rows = legend_rows)
p_curve_all_combined_biv            <- make_pcurve_plot(all_observed_biv, outcome_colors, outcome_levels, legend = "none",   legend_rows = legend_rows)

# Panel C, quasi experimental
p_curve_all_combined_random_withlegend <- make_pcurve_plot(all_observed_random, outcome_colors, outcome_levels, legend = "bottom", legend_rows = legend_rows)
p_curve_all_combined_random            <- make_pcurve_plot(all_observed_random, outcome_colors, outcome_levels, legend = "none",   legend_rows = legend_rows)

## ------------------------ Three single panels (preview) ------------------------
Figure_G1A <- p_curve_all_combined_withlegend + labs(title = "A) Full Models")
Figure_G1B <- p_curve_all_combined_biv_withlegend + labs(title = "B) Bivariate Models")
Figure_G1C <- p_curve_all_combined_random_withlegend + labs(title = "C) Quasi experimental Models")

print(Figure_G1A)
print(Figure_G1B)
print(Figure_G1C)
